package mosdi.subcommands;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.List;

import mosdi.fa.IIDTextModel;
import mosdi.fa.TextModel;
import mosdi.matching.PositionWeightMatrix;
import mosdi.util.Alphabet;
import mosdi.util.Log;
import mosdi.util.NamedSequence;
import mosdi.util.SequenceUtils;

public class PfmToPwmSubcommand extends Subcommand {
	@Override
	public String usage() {
		return
		super.usage()+" [options] <pfm-file> <output-file>\n" +
		"\n" +
		"  <pfm-file> a file containing a position frequency matrix (e.g., as\n" +
		"             output by mosdi-util generate-pfm)\n" +
		"\n" +
		"Options:\n" +
		"  -p <pseudo-counts>: add pseudo-counts to each PFM entry\n" +
		"  -t <fasta-file>: print threshold such that all sequences in fasta file\n" +
		"                   contain at least one match.\n" +
		"  -c <char-dist-file>: file containing character frequencies (e.g., as output\n" +
		"                       by mosdi-util count-qgrams for q=1) to be used as\n" +
		"                       background distribution (default: use uniform model).\n" +
		"  -d <digits>: give output with limited precision.";
	}

	@Override
	public String description() {
		return
		"Given a position frequency matrix (PFM) and a character distribution, " +
		"outputs a position weight matrix (PWM) containing log-odds scores.";
	}

	@Override
	public String name() {
		return "pfm-to-pwm";
	}

	@Override
	public int run(String[] args) {
		parseOptions(args, 2, "p:t:c:d:");

		// Option dependencies
		// -- none --

		// Mandatory arguments
		String pfmFilename = getStringArgument(0);
		String outputFilename = getStringArgument(1);

		// Options
		double pseudoCounts = getDoubleOption("p", 0.0d);
		String fastaFilename = getStringOption("t", null);
		String charDistFilename = getStringOption("c", null);
		int digits = getNonNegativeIntOption("d", -1); 

		Alphabet alphabet = Alphabet.getDnaAlphabet();

		double[] charDist = null;
		IIDTextModel textModel = null;
		if (charDistFilename!=null) {
			try {
				TextModel t = SequenceUtils.buildTextModelFromQGramFile(charDistFilename, alphabet);
				if (t instanceof IIDTextModel) {
					textModel = (IIDTextModel)t;
				} else {
					Log.errorln("Invalid character distribution file.");
					return 1;
				}
			} catch (Exception e) {
				Log.errorln("Invalid character distribution file: "+e.getMessage());
				return 1;
			}
		} else {
			textModel = new IIDTextModel(alphabet.size());
		}
		charDist = textModel.getCharacterDistribution();
		
		double[][] pfm = SequenceUtils.readPositionMatrix(pfmFilename, alphabet);
		PositionWeightMatrix pwm = new PositionWeightMatrix(pfm, charDist, pseudoCounts);
		StringBuffer sb = new StringBuffer();
		String formatString = null;
		if (digits!=-1) formatString = "%."+digits+"f";
		for (int c=0; c<alphabet.size(); ++c) {
			for (int column=0; column<pwm.width(); ++column) {
				if (column>0) sb.append('\t');
				if (formatString!=null)	sb.append(String.format(formatString, pwm.getEntry(c,column)));
				else sb.append(pwm.getEntry(c,column));
			}
			sb.append('\n');
		}
		try {
			BufferedWriter out = new BufferedWriter(new FileWriter(outputFilename));
			out.write(sb.toString());
			out.close();
		} catch (IOException e) {
			Log.errorln("Error writing output: "+e.getMessage());
			return 1;
		}
		Log.printf(Log.Level.VERBOSE, "Min/max score: %e / %e\n", pwm.minScore(), pwm.maxScore());
		if (fastaFilename!=null) {
			List<NamedSequence> namedSequences = null;
			try {
				namedSequences = SequenceUtils.readFastaFile(fastaFilename, alphabet, true);
			} catch (Exception e) {
				Log.errorln(e.getMessage());
				return 1;
			}
			double t = Double.POSITIVE_INFINITY;
			for (NamedSequence ns : namedSequences) {
				t = Math.min(t, pwm.maxScore(ns.getSequence()));
			}
			Log.printf(Log.Level.STANDARD, "Score threshold: %e\n", t);
		}
		
		return 0;
	}

}
